/*
 *   Copyright (c) 2018. 刘路 All rights reserved
 *   版权所有 刘路 并保留所有权利 2018.
 *   ===============================================================
 *   这不是一个自由软件！您只能在不用于商业目的的前提下对程序代码进行修改和
 *   使用。不允许对程序代码以任何形式任何目的的再发布。如果项目发布携带作者
 *   认可的特殊 LICENSE 则按照 LICENSE 执行，废除上面内容。请保留原作者信息。
 *   ================================================================
 *   刘路（feedback@zhoyq.com）于 2018. 创建
 *   http://zhoyq.com
 */

package com.zhoyq.helper.text;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.IOException;
import java.nio.file.FileSystems;
import java.util.ArrayList;
import java.util.List;

/**
 * 使用lucene进行全文检索 的帮助类
 * @author 刘路
 */
public class LuceneHelper {

  public static Logger log = LoggerFactory.getLogger(LuceneHelper.class);

  public static final String EMPTY_STING = "";
  
  public static Analyzer createIKAnalyzer(){
    // 创建分词器，标准分词器 
    return new IKAnalyzer(true);
  }
  
  /**
   * 将字段创建索引存入路径文件中
   * 
   * e. Field id = new TextField(
   *   "lecture_id", 
   *   lecture.getInt("lecture_id").toString() , 
   *   Field.Store.YES);
   *   document.add(id)
   *   
   *   docList.add(document);
   *   
   * @param analyzer
   * @param docList
   * @param indexPath
   * @throws IOException 
   */
  public static void createIndex(Analyzer analyzer,List<Document> docList,String indexPath) {
    // 创建IndexWriter
    IndexWriterConfig cfg = new IndexWriterConfig(analyzer);
    try {
      // 指定索引库的地址
      Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(indexPath));
      IndexWriter writer = new IndexWriter(directory, cfg);
      writer.deleteAll(); //清除以前的index
      // 通过IndexWriter对象将Document写入到索引库中
      for (Document doc : docList) {
          writer.addDocument(doc);
      }
      // 关闭writer
      writer.close();
    } catch (IOException e) {
      log.warn(e.getMessage());
    } 
  }
  
  public static Document toDoc(Field ... fields){
    Document doc = new Document();
    for(Field f : fields){
      doc.add(f);
    }
    return doc;
  }
  
  public static Query createQuery(Analyzer analyzer,String searchTag,String keyword) {
    if(keyword==null || searchTag == null 
        || EMPTY_STING.equals(searchTag)
        || EMPTY_STING.equals(keyword)){
      return null;
    } 
    // 使用QueryParser搜索时，需要指定分词器，搜索时的分词器要和索引时的分词器一致
    // 第一个参数：默认搜索的域的名称
    QueryParser parser = new QueryParser(searchTag, analyzer);
    // 通过queryparser来创建query对象
    // 参数：输入的lucene的查询语句(关键字一定要大写)
    try {
      return parser.parse(searchTag+":"+keyword);
    } catch (ParseException e) {  
      log.warn(e.getMessage());
      return null;
    }
  }
  
  public static Highlighter createHighlight( Query query,String warpHtml){
    QueryScorer scorer = new QueryScorer(query);
    // 得到得分的片段，就是得到一段包含所查询的关键字的摘要
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
    String[] warpArr = warpHtml.split("\\*");
    // 对查询的数据格式化；无参构造器的默认是将关键字加粗
    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
        warpArr[0], warpArr[1]);
    // 根据得分和格式化
    Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
    // 设置成高亮
    highlighter.setTextFragmenter(fragmenter);
    return highlighter;
  }
  
  public static String highlight(
      Analyzer analyzer,Highlighter highlighter,
      Document doc,String tag){
    try {
      return highlighter.getBestFragment(analyzer,tag, doc.get(tag));
    } catch (IOException e) {
      log.warn(e.getMessage());
      return null;
    } catch (InvalidTokenOffsetsException e) {
      log.warn(e.getMessage());
      return null;
    } 
  }
  
  public static IndexSearcher createIndexSearcher(String indexPath){
    Directory directory = null;
    IndexReader reader = null;
    try {
      directory = FSDirectory.open(FileSystems.getDefault().getPath(indexPath));
      reader = DirectoryReader.open(directory);
    } catch (IOException e) { 
      log.warn(e.getMessage());
    }
    if(reader == null){
      return null;
    } else {
      return new IndexSearcher(reader);
    }
  }
  
  public static <T> SearchResult<T> searchIndex(
      Integer pageNumber,Integer pageSize,
      Analyzer analyzer,Query query,IndexSearcher searcher,
      DocAnalyzerWithHighlight doca){ 
    
    Integer count = 0;
    List<T> list = new ArrayList<T>();
    
    if(query == null || searcher==null) {
      log.warn("搜索失败 丢失参数 query searcher");
      return null;
    }else{
      TopDocs topDocs = null;
      try {
        topDocs = searcher.search(query, pageNumber*pageSize);
      } catch (IOException e) {  
        log.warn(e.getMessage());
      }
      if(topDocs==null){
        log.warn("搜索失败 未检索到内容");
        return null;
      }else{
        // 匹配总数
        count = topDocs.totalHits;
        // 查询处理来的前 pageNumber*pageSize 个
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        
        Integer start = (pageNumber-1) * pageSize;
        Integer end   = (pageNumber * pageSize > scoreDocs.length)?scoreDocs.length :pageNumber * pageSize ;
        
        for(int i=start;i<end;i++){
          ScoreDoc scoreDoc = scoreDocs[i];
          // 获取文档的ID
          int docId = scoreDoc.doc;
          // 通过ID获取文档
          Document doc = null;
          try {
            doc = searcher.doc(docId);
          } catch (IOException e) {
            log.warn(e.getMessage());
          }
          if(doc==null){
            log.warn("未查询到文档");
          }else{
            T t = doca.setFields(doc);
            list.add(t);  
          }
        }
        try {
          searcher.getIndexReader().close();
        } catch (IOException e) {
          System.out.println(e.getMessage());
        }
      }
    }
    SearchResult sr = new SearchResult(list, pageNumber, pageSize,count%pageSize==0?count/pageSize:count/pageSize+1, count);
    return sr;
  } 
  
}
